7  Find Markers

Finds markers (differentially expressed genes) for each of the UMAP blobs

Published

August 11, 2025

suppressPackageStartupMessages({
    library(tidyverse)
    library(Seurat)
    library(kableExtra)
    })

load("data/20250620-seurat_integrated_UMAP_nFeat-ScTypeannotated.Rdata")

7.1 UMAP

Comparison of UMAPs previously annotated using ScType and clustered with Seurat.

d1 <- DimPlot(alldata, label = TRUE, repel = T, label.size = 5) + 
    NoLegend()

Idents(alldata) <- alldata$sctype_classification
d2 <- DimPlot(alldata, label = TRUE, repel = T, label.size = 4,) + 
    NoLegend() 

d1|d2

all_markers <- markers <- FindAllMarkers(alldata, only.pos = TRUE, densify = T)
# write_csv(all_markers, file = "data/20250728-all_markers.csv")

7.2 Top marker genes per cluster

7.2.1 Dot plot

Dot plot showing the scaled expression of the top five marker genes per cluster in the integrated dataset. Clusters correspond to Seurat-defined groups (seurat_clusters) and are ordered according to their assigned cell type (sctype_classification). The size of each dot represents the percentage of cells within a cluster expressing the gene, while the color intensity indicates the average expression level. Cluster labels (X-axis) are color-coded by sctype_classification to visually group clusters belonging to the same cell type.

markers <- read_csv("data/20250728-all_markers.csv")
top5 <- markers |> 
    filter(pct.1>0.3) |> 
    group_by(cluster) |> 
    top_n(5, avg_log2FC)



# Idents set to seurat_cluster
Idents(alldata) <- alldata$seurat_clusters

# Map seurat_clusters to sctype_classification
label_map <- alldata@meta.data  |> 
  select(seurat_clusters, sctype_classification) |> 
  distinct()

# Reorder seurat_cluster based on sctype_classification
seurat_clusters_ordered <- label_map |>
  arrange(sctype_classification) |>
  pull(seurat_clusters)

# Generate distinct colors automatically (1 color per sctype_classification)
n_classes <- length(unique(label_map$sctype_classification))
auto_colors <- setNames(
  scales::hue_pal()(n_classes),
  unique(label_map$sctype_classification)
)

# Make the DotPlot

Idents(alldata) = factor(alldata$seurat_clusters,
                         levels = seurat_clusters_ordered)
DotPlot(alldata, features = top5$gene) + 
    coord_flip()+
# Apply color to axis text
    theme(
  axis.text.x = element_text(
      size=14,
    colour = setNames(
      auto_colors[label_map$sctype_classification],
      label_map$seurat_cluster
    )[levels(Idents(alldata))]
  )
)

7.2.2 List of the top five marker genes for each Seurat cluster

kbl(top5) |> 
    kable_styling("striped") |> 
    scroll_box(height = "1000px")
p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
0 5.383311 0.960 0.331 0.00e+00 0 Bcl2a1b
0 4.897816 0.695 0.129 0.00e+00 0 Mir155hg
0 6.163070 0.748 0.331 0.00e+00 0 Osbpl8
0 5.619881 0.678 0.325 0.00e+00 0 Whrn
0 6.029228 0.711 0.361 0.00e+00 0 Pdgfb
0 6.011845 0.906 0.396 0.00e+00 1 Entpd1
0 7.829072 0.841 0.393 0.00e+00 1 Mertk
0 6.109155 0.667 0.250 0.00e+00 1 Adrb2
0 6.177441 0.771 0.366 0.00e+00 1 Fchsd2
0 6.543904 0.737 0.346 0.00e+00 1 Tbc1d9
0 3.940018 0.959 0.427 0.00e+00 2 Serpine2
0 4.273323 0.677 0.214 0.00e+00 2 Baiap2l2
0 3.120491 0.668 0.370 0.00e+00 2 P4ha1
0 3.465477 0.596 0.332 0.00e+00 2 Pfkfb3
0 5.673301 0.535 0.266 0.00e+00 2 Rab7b
0 6.686134 0.845 0.079 0.00e+00 3 Gm42756
0 6.653809 0.824 0.107 0.00e+00 3 Sox10
0 6.686665 0.846 0.197 0.00e+00 3 Usp54
0 6.984864 0.751 0.124 0.00e+00 3 Vldlr
0 6.891515 0.601 0.186 0.00e+00 3 5033421B08Rik
0 6.586443 0.962 0.292 0.00e+00 4 Gm10790
0 8.725423 0.853 0.202 0.00e+00 4 8030442B05Rik
0 8.813212 0.989 0.383 0.00e+00 4 P2ry12
0 8.095441 0.823 0.243 0.00e+00 4 A830008E24Rik
0 6.336412 0.917 0.356 0.00e+00 4 4933406I18Rik
0 12.962007 0.967 0.065 0.00e+00 5 Ocln
0 13.197439 0.884 0.015 0.00e+00 5 Ushbp1
0 13.078963 0.859 0.014 0.00e+00 5 Hspa12b
0 13.569486 0.929 0.089 0.00e+00 5 Thsd1
0 12.519384 0.891 0.285 0.00e+00 5 Fut8
0 5.305894 0.852 0.187 0.00e+00 6 Lgals3
0 7.534143 0.965 0.410 0.00e+00 6 Ftl1
0 6.665051 0.691 0.275 0.00e+00 6 Itga5
0 5.344663 0.687 0.286 0.00e+00 6 Fabp5
0 9.585140 0.532 0.310 0.00e+00 6 Fth1
0 8.127567 0.982 0.106 0.00e+00 7 Anln
0 8.686365 0.881 0.140 0.00e+00 7 Ppp2r2c
0 8.975226 0.910 0.190 0.00e+00 7 Sccpdh
0 8.860418 0.911 0.206 0.00e+00 7 Eml2
0 13.274086 0.808 0.253 0.00e+00 7 Plin3
0 6.084166 0.859 0.433 0.00e+00 8 Runx1
0 9.114216 0.887 0.495 0.00e+00 8 Basp1
0 6.960778 0.758 0.389 0.00e+00 8 Mir142hg
0 6.221935 0.527 0.308 0.00e+00 8 Creb5
0 6.377049 0.540 0.335 0.00e+00 8 Ifitm10
0 7.337465 0.858 0.089 0.00e+00 9 Serpina3n
0 7.531294 0.904 0.171 0.00e+00 9 Tmod1
0 8.824104 0.864 0.171 0.00e+00 9 Tspan15
0 10.654820 0.791 0.124 0.00e+00 9 Sh3gl3
0 7.370502 0.558 0.049 0.00e+00 9 Kit
0 6.669409 0.984 0.076 0.00e+00 10 H2-Aa
0 6.107818 0.965 0.060 0.00e+00 10 H2-Eb1
0 5.653954 0.980 0.176 0.00e+00 10 H2-Ab1
0 6.099866 0.995 0.207 0.00e+00 10 Cd74
0 6.207174 0.927 0.218 0.00e+00 10 Itgax
0 10.965876 0.855 0.010 0.00e+00 11 B230323A14Rik
0 11.208839 0.836 0.024 0.00e+00 11 Sfrp5
0 10.121493 0.824 0.153 0.00e+00 11 Fam228b
0 11.441386 0.821 0.170 0.00e+00 11 Rgma
0 12.556664 0.696 0.474 0.00e+00 11 Sparc
0 4.613013 0.755 0.234 0.00e+00 12 Gm15155
0 6.292254 0.973 0.466 0.00e+00 12 Plxdc2
0 6.879256 0.858 0.407 0.00e+00 12 Nrp1
0 4.688339 0.826 0.437 0.00e+00 12 Gpr34
0 4.622451 0.332 0.301 0.00e+00 12 Plcl1
0 16.714419 0.975 0.036 0.00e+00 13 Gldc
0 14.989121 0.934 0.039 0.00e+00 13 Grin2c
0 11.897805 0.929 0.045 0.00e+00 13 St6galnac5
0 14.997302 0.884 0.087 0.00e+00 13 4930488L21Rik
0 12.983120 0.998 0.264 0.00e+00 13 Tspan7
0 10.029174 0.914 0.122 0.00e+00 14 Mx1
0 9.720563 0.848 0.120 0.00e+00 14 A330040F15Rik
0 9.021241 0.849 0.128 0.00e+00 14 Rsad2
0 10.356806 0.988 0.285 0.00e+00 14 Ifi204
0 10.424149 0.921 0.422 0.00e+00 14 Pik3ap1
0 17.534114 0.983 0.003 0.00e+00 15 Cxcr6
0 19.458819 0.973 0.008 0.00e+00 15 Lck
0 16.470059 0.984 0.020 0.00e+00 15 Trac
0 16.619246 0.958 0.002 0.00e+00 15 Cd226
0 16.343388 0.948 0.025 0.00e+00 15 Runx3
0 13.051094 0.994 0.015 0.00e+00 16 Drc7
0 11.774016 0.991 0.025 0.00e+00 16 Prr32
0 11.639716 0.998 0.034 0.00e+00 16 Clic6
0 12.388454 0.998 0.203 0.00e+00 16 Ezr
0 12.746285 1.000 0.348 0.00e+00 16 Cab39l
0 12.246278 0.981 0.401 0.00e+00 17 Syngr1
0 9.275217 0.994 0.438 0.00e+00 17 Tyrobp
0 10.485866 0.997 0.506 0.00e+00 17 Rplp1
0 11.219707 0.991 0.501 0.00e+00 17 Tmsb4x
0 9.060232 0.990 0.529 0.00e+00 17 Fcer1g
0 18.534854 0.938 0.021 0.00e+00 18 Ryr2
0 19.407485 0.906 0.015 0.00e+00 18 Grin2a
0 21.165791 0.914 0.389 0.00e+00 18 Celf2
0 22.657484 0.762 0.009 0.00e+00 18 A230006K03Rik
0 21.655781 0.726 0.036 0.00e+00 18 Sncb
0 7.605571 0.992 0.030 0.00e+00 19 Slc6a20a
0 7.758149 0.994 0.054 0.00e+00 19 Atp13a5
0 7.764430 0.995 0.068 0.00e+00 19 Notch3
0 7.773113 1.000 0.074 0.00e+00 19 Vtn
0 8.832420 0.923 0.045 0.00e+00 19 Aspn
0 17.112431 0.985 0.018 0.00e+00 20 Sp9
0 16.538981 0.956 0.034 0.00e+00 20 Kcnb2
0 16.118414 0.976 0.306 0.00e+00 20 Pbx3
0 15.010216 0.932 0.021 0.00e+00 20 Epha6
0 16.698508 0.884 0.034 0.00e+00 20 Dlx2
0 7.129027 0.822 0.194 0.00e+00 21 Tmem178b
0 7.083616 0.748 0.238 0.00e+00 21 Nbas
0 6.807112 0.701 0.160 0.00e+00 21 Nkain1
0 6.915113 0.698 0.214 0.00e+00 21 Arhgef28
0 10.566937 0.521 0.285 0.00e+00 21 Nckap1
0 13.006318 0.907 0.012 0.00e+00 22 F13a1
0 13.960084 0.960 0.231 0.00e+00 22 Ms4a6c
0 12.783361 0.886 0.061 0.00e+00 22 Pf4
0 12.916844 0.803 0.247 0.00e+00 22 Stab1
0 13.608312 0.753 0.353 0.00e+00 22 Maf
0 12.109861 1.000 0.064 0.00e+00 23 Ndufa4l2
0 11.042306 1.000 0.070 0.00e+00 23 Egflam
0 11.110187 0.990 0.110 0.00e+00 23 Arhgap42
0 13.643806 0.990 0.255 0.00e+00 23 Ctdspl
0 10.951412 0.969 0.329 0.00e+00 23 Septin11
0 24.821567 1.000 0.167 0.00e+00 24 Cfap141
0 27.457694 0.996 0.003 0.00e+00 24 Sntn
0 25.591092 0.991 0.027 0.00e+00 24 Fam166b
0 29.195085 0.991 0.084 0.00e+00 24 Cdhr4
0 25.670129 0.957 0.024 0.00e+00 24 Stoml3
0 13.385614 0.970 0.090 0.00e+00 25 Pdgfra
0 13.573818 1.000 0.272 0.00e+00 25 Epn2
0 18.279946 0.970 0.290 0.00e+00 25 Itga9
0 14.885678 0.827 0.508 0.00e+00 25 Marcks
0 12.627057 0.523 0.246 4.70e-06 25 Agap1
0 11.521800 0.959 0.293 0.00e+00 26 Tpm1
0 12.845471 0.942 0.302 0.00e+00 26 Crim1
0 11.753684 0.884 0.075 0.00e+00 26 Rasd1
0 13.341255 0.901 0.348 0.00e+00 26 Rbpms
0 14.338538 0.709 0.319 0.00e+00 26 Slc38a2
0 8.995693 1.000 0.120 0.00e+00 27 Cdca3
0 9.018390 1.000 0.162 0.00e+00 27 Cdca8
0 8.773329 0.979 0.141 0.00e+00 27 Cenpe
0 8.967621 0.965 0.057 0.00e+00 27 Hmmr
0 9.105028 0.896 0.108 0.00e+00 27 Aspm
0 9.776108 1.000 0.197 0.00e+00 28 Grid2
0 10.209713 1.000 0.240 0.00e+00 28 Ust
0 9.873548 1.000 0.300 0.00e+00 28 Pals2
0 11.913883 0.993 0.277 0.00e+00 28 Ank2
0 9.693226 1.000 0.091 0.00e+00 28 Cap2
0 11.310231 1.000 0.051 0.00e+00 29 Otx2os1
0 9.001266 1.000 0.186 0.00e+00 29 Pip5k1b
0 8.989740 0.990 0.040 0.00e+00 29 Kcne2
0 10.779732 0.990 0.230 0.00e+00 29 Sh3d19
0 11.824433 0.958 0.037 0.00e+00 29 Atp2b3
0 7.039520 0.878 0.080 0.00e+00 30 Fgfr3
0 6.968599 0.796 0.064 0.00e+00 30 Agt
0 7.973467 0.816 0.189 0.00e+00 30 Rhou
0 6.571570 0.633 0.184 3.38e-05 30 Vegfa
0 7.359981 0.633 0.089 6.51e-05 30 Gabra2

7.2.3 Upset chart

The UpSet chart visualizes the overlaps in marker genes between the different Seurat clusters identified by FindAllMarkers (~Venn diagram for many sets).

  • Horizontal bars on the left show the total number of unique marker genes for each individual cluster.
  • Vertical bars at the top quantify the size of each intersection—that is, how many genes are shared by the specific combination of clusters highlighted by the connected black dots below.
  • Single black dots (no connecting lines) indicate marker genes unique to a single cluster.
  • Multiple connected dots indicate sets of clusters that share the same marker genes; the connecting lines trace which clusters are part of the intersection.
  • A taller vertical bar means a larger number of genes are shared in that particular combination.
  • Conversely, shorter bars correspond to smaller overlaps.

In biological terms,

  • Large overlaps between clusters may indicate closely related cell states or subtypes—perhaps reflecting transitional phenotypes, activation states, or technical resolution limits of clustering.
  • Small or absent overlaps suggest distinct transcriptional programs, supporting the idea that the clusters represent separate cell types or states.
library(UpSetR)

all_markers <- read_csv("/Users/arbones/Dropbox/SyncBriefcase/LAB/UK/scRNA_Akhil/Analysis/Seurat/data/20250728-all_markers.csv")

mark <- markers |> 
    group_by(cluster) |> 
    summarise(genes = list(unique(gene)))

# Convert to a named list
mark_list <- setNames(mark$genes, mark$cluster)
upset(fromList(mark_list),order.by = "freq", keep.order = T,
      nsets = 31, nintersects = 40,
      mainbar.y.label = "Common genes\nbetween clusters",
      sets.x.label = "Gene Markers per cluster",
      point.size = 3,
      mb.ratio = c(0.4,0.6),
      text.scale = 1.8)

7.2.4 Violin plots

Normalized expression values from the SCT assay in your Seurat object for the marker genes

The SCT assay stores log-normalized residual expression values from SCTransform. Values are continuous and typically centered around 0

library(gtable)
library(grid)
library(ggtext)
library(ggtext) # needed for colored axis labels

genes_of_interest <- c("P2ry12","Trem2","Clec7a","Mrc1","Cd3e","sct_Cd8a",
                       "Ptprc","sct_Aldh1l1","Aldoc","Mog",
                       "Olig2","Pdgfra","Ttr","Ccdc153","sct_Slc47a1","Flt1","Emcn",
                       "sct_Col1a2","sct_Bgn","Vtn","Kcnj8","Dcx","Snap25","Rbfox3")
violin_data <- FetchData(alldata, c(genes_of_interest,"seurat_clusters"), assay = "SCT")
violin_data_long <- violin_data  |> 
    pivot_longer(-seurat_clusters) |> 
    mutate(seurat_clusters = fct_rev(seurat_clusters),
           name = factor(name, levels = genes_of_interest))

# Get the default ggplot discrete colors
cluster_levels <- levels(violin_data_long$seurat_clusters)
palette_colors <- scales::hue_pal()(length(cluster_levels))
names(palette_colors) <- cluster_levels

# Create colored axis labels (HTML span with color)
axis_labels <- purrr::map2_chr(cluster_levels, palette_colors,
                               ~sprintf("<span style='color:%s'>%s</span>", .y, .x))

p <- ggplot(violin_data_long, aes(x = seurat_clusters, y = value, fill = seurat_clusters)) +
    geom_violin(aes(color = seurat_clusters),
                scale = "width", trim = TRUE, adjust = 1, show.legend = FALSE) +
    coord_flip() +
    scale_fill_manual(values = palette_colors) +
    scale_color_manual(values = palette_colors) +
    facet_wrap(~name, scales = "free_x", nrow = 1) +
    theme_minimal() +
    labs (x = "Cluster", y = "Expression Levels")+
    theme(
        legend.position = "left",
        axis.text.y = element_markdown(size = 12),
        axis.text.x = element_text(size = 6),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        strip.text = element_text(size=11, face = "italic")
    ) +
    scale_x_discrete(labels = axis_labels)

# Convert ggplot object to grob (graphical object)
g <- ggplotGrob(p)

# Find all the left axis grobs (y-axis elements)
axis_grobs <- which(grepl("axis-l", g$layout$name))

# Remove y-axis text from all but the first facet

invisible(
  map(axis_grobs, function(i) {
      # The first facet left axis is named 'axis-l-1-1'
    if (g$layout[i, "name"] != "axis-l-1-1") {
      g$grobs[[i]] <<- nullGrob()
    }
  })
)

# Draw the modified plot
grid.newpage()
grid.draw(g)